#‘Metabolism of A Noodle Restaurant about Food Loss and Food Waste: Micro-Level Material Flow Model and Tobit Regression Analysis’
head(df)
## date day is_closed food_loss_kg food_waste_kg solid_waste_kg
## 1 2022-09-16 Fri FALSE 9.50 6.55 2.50
## 2 2022-09-17 Sat FALSE 12.25 2.80 0.60
## 3 2022-09-18 Sun FALSE 6.50 3.25 0.85
## 4 2022-09-20 Tue FALSE 13.10 0.70 0.30
## 5 2022-09-21 Wed FALSE 5.70 1.10 0.45
## 6 2022-09-22 Thu FALSE 7.25 0.80 0.35
## liquid_waste_kg customers fulls halfs takeouts liquors sales container
## 1 4.05 42 36 4 15 2 1080.48 0
## 2 2.20 42 30 6 12 2 861.76 0
## 3 2.40 27 24 2 10 1 629.49 0
## 4 0.40 13 10 2 12 4 635.33 0
## 5 0.65 15 10 3 10 1 533.32 0
## 6 0.45 14 10 2 16 1 680.46 0
## temp_c humi_p prcp_mm TS_noodle_kg TS_water_kg TS_bones_kg TS_veg_kg
## 1 9.04 89.5 4.1 -7.950 -34.450 -8.7450 -4.982
## 2 7.00 92.5 1.0 -6.750 -29.250 -7.4250 -4.230
## 3 9.61 81.1 0.0 -5.250 -22.750 -5.7750 -3.290
## 4 5.66 74.1 0.0 -3.450 -14.950 -3.7950 -2.162
## 5 7.35 76.7 0.0 -3.225 -13.975 -3.5475 -2.021
## 6 10.78 66.7 0.0 -4.050 -17.550 -4.4550 -2.538
## TS_meat_kg TS_condi_kg TS_Broth_kg TS_Stock_kg TS_FL_kg TS_FL_bone_kg
## 1 -2.12 -0.7950 1.029300e-15 29.68 11.342 -8.7450
## 2 -1.80 -0.6750 -1.110223e-15 25.20 9.630 -7.4250
## 3 -1.40 -0.5250 1.110223e-15 19.60 7.490 -5.7750
## 4 -0.92 -0.3450 -7.569254e-16 12.88 4.922 -3.7950
## 5 -0.86 -0.3225 -1.514838e-16 12.04 4.601 -3.5475
## 6 -1.08 -0.4050 3.128362e-16 15.12 5.778 -4.4550
## TS_FL_veg_kg TS_FL_meat_kg TS_FP_kg FL_noodle_kg FL_water_kg FL_bones_kg
## 1 -2.332 -0.2650 47.70 -6.658879 -28.85514 -7.324766
## 2 -1.980 -0.2250 40.50 -8.586449 -37.20794 -9.445093
## 3 -1.540 -0.1750 31.50 -4.556075 -19.74299 -5.011682
## 4 -1.012 -0.1150 20.70 -9.182243 -39.78972 -10.100467
## 5 -0.946 -0.1075 19.35 -3.995327 -17.31308 -4.394860
## 6 -1.188 -0.1350 24.30 -5.081776 -22.02103 -5.589953
## FL_veg_kg FL_meat_kg FL_condi_kg FL_Broth_kg FL_Stock_kg FL_FL_kg
## 1 -4.172897 -1.775701 -0.6658879 -1.332268e-15 24.85981 9.50
## 2 -5.380841 -2.289720 -0.8586449 1.554312e-15 32.05607 12.25
## 3 -2.855140 -1.214953 -0.4556075 1.332268e-15 17.00935 6.50
## 4 -5.754206 -2.448598 -0.9182243 1.373771e-15 34.28037 13.10
## 5 -2.503738 -1.065421 -0.3995327 8.674266e-16 14.91589 5.70
## 6 -3.184579 -1.355140 -0.5081776 -1.110223e-15 18.97196 7.25
## FL_FL_bone_kg FL_FL_veg_kg FL_FL_meat_kg FL_FP_kg Broth_diff Final_Prod_diff
## 1 -7.324766 -1.953271 -0.2219626 39.95327 -4.820187 -7.746729
## 2 -9.445093 -2.518692 -0.2862150 51.51869 6.856075 11.018692
## 3 -5.011682 -1.336449 -0.1518692 27.33645 -2.590654 -4.163551
## 4 -10.100467 -2.693458 -0.3060748 55.09346 21.400374 34.393458
## 5 -4.394860 -1.171963 -0.1331776 23.97196 2.875888 4.621963
## 6 -5.589953 -1.490654 -0.1693925 30.49065 3.851963 6.190654
## daily_total_served tueD wedD thuD friD satD tueE wedE thuE friE satE
## 1 47.70 0 0 0 1 0 0 0 0 1 0
## 2 40.50 0 0 0 0 1 0 0 0 0 1
## 3 31.50 0 0 0 0 0 -1 -1 -1 -1 -1
## 4 20.70 1 0 0 0 0 1 0 0 0 0
## 5 19.35 0 1 0 0 0 0 1 0 0 0
## 6 24.30 0 0 1 0 0 0 0 1 0 0
str(df)
## 'data.frame': 169 obs. of 56 variables:
## $ date : chr "2022-09-16" "2022-09-17" "2022-09-18" "2022-09-20" ...
## $ day : chr "Fri" "Sat" "Sun" "Tue" ...
## $ is_closed : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ food_loss_kg : num 9.5 12.2 6.5 13.1 5.7 ...
## $ food_waste_kg : num 6.55 2.8 3.25 0.7 1.1 0.8 1.5 2.65 2.55 2.2 ...
## $ solid_waste_kg : num 2.5 0.6 0.85 0.3 0.45 0.35 0.65 0.7 0.8 0.8 ...
## $ liquid_waste_kg : num 4.05 2.2 2.4 0.4 0.65 0.45 0.85 1.95 1.75 1.4 ...
## $ customers : int 42 42 27 13 15 14 12 35 24 26 ...
## $ fulls : int 36 30 24 10 10 10 11 35 18 25 ...
## $ halfs : int 4 6 2 2 3 2 2 2 3 3 ...
## $ takeouts : int 15 12 10 12 10 16 28 23 25 13 ...
## $ liquors : int 2 2 1 4 1 1 2 3 6 3 ...
## $ sales : num 1080 862 629 635 533 ...
## $ container : int 0 0 0 0 0 0 0 0 0 0 ...
## $ temp_c : num 9.04 7 9.61 5.66 7.35 ...
## $ humi_p : num 89.5 92.5 81.1 74.1 76.7 66.7 75.6 71.3 70.1 74.7 ...
## $ prcp_mm : num 4.1 1 0 0 0 0 0 0 0 0 ...
## $ TS_noodle_kg : num -7.95 -6.75 -5.25 -3.45 -3.23 ...
## $ TS_water_kg : num -34.5 -29.2 -22.8 -14.9 -14 ...
## $ TS_bones_kg : num -8.74 -7.42 -5.78 -3.79 -3.55 ...
## $ TS_veg_kg : num -4.98 -4.23 -3.29 -2.16 -2.02 ...
## $ TS_meat_kg : num -2.12 -1.8 -1.4 -0.92 -0.86 -1.08 -1.6 -2.36 -1.78 -1.58 ...
## $ TS_condi_kg : num -0.795 -0.675 -0.525 -0.345 -0.323 ...
## $ TS_Broth_kg : num 1.03e-15 -1.11e-15 1.11e-15 -7.57e-16 -1.51e-16 ...
## $ TS_Stock_kg : num 29.7 25.2 19.6 12.9 12 ...
## $ TS_FL_kg : num 11.34 9.63 7.49 4.92 4.6 ...
## $ TS_FL_bone_kg : num -8.74 -7.42 -5.78 -3.79 -3.55 ...
## $ TS_FL_veg_kg : num -2.332 -1.98 -1.54 -1.012 -0.946 ...
## $ TS_FL_meat_kg : num -0.265 -0.225 -0.175 -0.115 -0.107 ...
## $ TS_FP_kg : num 47.7 40.5 31.5 20.7 19.4 ...
## $ FL_noodle_kg : num -6.66 -8.59 -4.56 -9.18 -4 ...
## $ FL_water_kg : num -28.9 -37.2 -19.7 -39.8 -17.3 ...
## $ FL_bones_kg : num -7.32 -9.45 -5.01 -10.1 -4.39 ...
## $ FL_veg_kg : num -4.17 -5.38 -2.86 -5.75 -2.5 ...
## $ FL_meat_kg : num -1.78 -2.29 -1.21 -2.45 -1.07 ...
## $ FL_condi_kg : num -0.666 -0.859 -0.456 -0.918 -0.4 ...
## $ FL_Broth_kg : num -1.33e-15 1.55e-15 1.33e-15 1.37e-15 8.67e-16 ...
## $ FL_Stock_kg : num 24.9 32.1 17 34.3 14.9 ...
## $ FL_FL_kg : num 9.5 12.2 6.5 13.1 5.7 ...
## $ FL_FL_bone_kg : num -7.32 -9.45 -5.01 -10.1 -4.39 ...
## $ FL_FL_veg_kg : num -1.95 -2.52 -1.34 -2.69 -1.17 ...
## $ FL_FL_meat_kg : num -0.222 -0.286 -0.152 -0.306 -0.133 ...
## $ FL_FP_kg : num 40 51.5 27.3 55.1 24 ...
## $ Broth_diff : num -4.82 6.86 -2.59 21.4 2.88 ...
## $ Final_Prod_diff : num -7.75 11.02 -4.16 34.39 4.62 ...
## $ daily_total_served: num 47.7 40.5 31.5 20.7 19.4 ...
## $ tueD : int 0 0 0 1 0 0 0 0 0 1 ...
## $ wedD : int 0 0 0 0 1 0 0 0 0 0 ...
## $ thuD : int 0 0 0 0 0 1 0 0 0 0 ...
## $ friD : int 1 0 0 0 0 0 1 0 0 0 ...
## $ satD : int 0 1 0 0 0 0 0 1 0 0 ...
## $ tueE : int 0 0 -1 1 0 0 0 0 -1 1 ...
## $ wedE : int 0 0 -1 0 1 0 0 0 -1 0 ...
## $ thuE : int 0 0 -1 0 0 1 0 0 -1 0 ...
## $ friE : int 1 0 -1 0 0 0 1 0 -1 0 ...
## $ satE : int 0 1 -1 0 0 0 0 1 -1 0 ...
names(df)
## [1] "date" "day" "is_closed"
## [4] "food_loss_kg" "food_waste_kg" "solid_waste_kg"
## [7] "liquid_waste_kg" "customers" "fulls"
## [10] "halfs" "takeouts" "liquors"
## [13] "sales" "container" "temp_c"
## [16] "humi_p" "prcp_mm" "TS_noodle_kg"
## [19] "TS_water_kg" "TS_bones_kg" "TS_veg_kg"
## [22] "TS_meat_kg" "TS_condi_kg" "TS_Broth_kg"
## [25] "TS_Stock_kg" "TS_FL_kg" "TS_FL_bone_kg"
## [28] "TS_FL_veg_kg" "TS_FL_meat_kg" "TS_FP_kg"
## [31] "FL_noodle_kg" "FL_water_kg" "FL_bones_kg"
## [34] "FL_veg_kg" "FL_meat_kg" "FL_condi_kg"
## [37] "FL_Broth_kg" "FL_Stock_kg" "FL_FL_kg"
## [40] "FL_FL_bone_kg" "FL_FL_veg_kg" "FL_FL_meat_kg"
## [43] "FL_FP_kg" "Broth_diff" "Final_Prod_diff"
## [46] "daily_total_served" "tueD" "wedD"
## [49] "thuD" "friD" "satD"
## [52] "tueE" "wedE" "thuE"
## [55] "friE" "satE"
# sample size: open and close days ---------------------------------------
data.frame(obs_days = nrow(df),
open_days = sum(df$is_closed),
closed_days = sum(!df$is_closed))
## obs_days open_days closed_days
## 1 169 8 161
df %>%
freq_table(is_closed)
## # A tibble: 2 × 3
## is_closed n prop
## <lgl> <int> <dbl>
## 1 FALSE 161 95.3
## 2 TRUE 8 4.7
df %>%
select(c(date, day, is_closed))%>%
subset(is_closed == TRUE)
## date day is_closed
## 21 2022-10-09 Sun TRUE
## 48 2022-11-10 Thu TRUE
## 49 2022-11-11 Fri TRUE
## 66 2022-12-01 Thu TRUE
## 86 2022-12-24 Sat TRUE
## 87 2022-12-25 Sun TRUE
## 93 2023-01-01 Sun TRUE
## 159 2023-03-19 Sun TRUE
# basic summary: dependents ----------------------------------------------------
data.frame(food_loss_waste = c(summary(df$food_loss_kg + df$food_waste_kg)),
food_loss = c(summary(df$food_loss_kg)),
food_waste_all = c(summary(df$food_waste_kg)),
food_waste_liquid = c(summary(df$liquid_waste_kg)),
food_waste_solid = c(summary(df$solid_waste_kg)))
## food_loss_waste food_loss food_waste_all food_waste_liquid
## Min. 0.000000 0.000000 0.000000 0.000000
## 1st Qu. 8.250000 6.600000 0.950000 0.550000
## Median 9.500000 7.300000 1.950000 1.400000
## Mean 9.543491 7.460355 2.083136 1.408876
## 3rd Qu. 11.050000 8.150000 2.900000 2.000000
## Max. 17.900000 13.800000 6.550000 4.500000
## food_waste_solid
## Min. 0.0000000
## 1st Qu. 0.3500000
## Median 0.6000000
## Mean 0.6742604
## 3rd Qu. 0.9000000
## Max. 2.9500000
df %>%
select(c(food_loss_kg,food_waste_kg,liquid_waste_kg,solid_waste_kg)) %>%
get_summary_stats()
## # A tibble: 4 × 13
## variable n min max median q1 q3 iqr mad mean sd se
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 food_loss_… 169 0 13.8 7.3 6.6 8.15 1.55 1.19 7.46 2.69 0.207
## 2 food_waste… 169 0 6.55 1.95 0.95 2.9 1.95 1.48 2.08 1.45 0.111
## 3 liquid_was… 169 0 4.5 1.4 0.55 2 1.45 1.04 1.41 1.02 0.079
## 4 solid_wast… 169 0 2.95 0.6 0.35 0.9 0.55 0.445 0.674 0.51 0.039
## # ℹ 1 more variable: ci <dbl>
library(summarytools)
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## system might not have X11 capabilities; in case of errors when using dfSummary(), set st_options(use.x11 = FALSE)
##
## Attaching package: 'summarytools'
## The following object is masked from 'package:tibble':
##
## view
df %>%
select(c(food_loss_kg,food_waste_kg,
liquid_waste_kg,solid_waste_kg)) %>%
descr(order = "preserve",
stats = c('mean', 'sd', 'min', 'q1', 'med', 'q3', 'max'),
round.digits = 6)
## Descriptive Statistics
## df
## N: 169
##
## food_loss_kg food_waste_kg liquid_waste_kg solid_waste_kg
## ------------- -------------- --------------- ----------------- ----------------
## Mean 7.460355 2.083136 1.408876 0.674260
## Std.Dev 2.693018 1.445795 1.021296 0.509818
## Min 0.000000 0.000000 0.000000 0.000000
## Q1 6.600000 0.950000 0.550000 0.350000
## Median 7.300000 1.950000 1.400000 0.600000
## Q3 8.150000 2.900000 2.000000 0.900000
## Max 13.800000 6.550000 4.500000 2.950000
# basic summary: dependents excluding closed days ------------------------------
data.frame(food_loss_waste = c(summary(df$food_loss_kg[!df$is_closed]
+ df$food_waste_kg[!df$is_closed])),
food_loss = c(summary(df$food_loss_kg[!df$is_closed])),
food_waste_all = c(summary(df$food_waste_kg[!df$is_closed])),
food_waste_liquid = c(summary(df$liquid_waste_kg[!df$is_closed])),
food_waste_solid = c(summary(df$solid_waste_kg[!df$is_closed])))
## food_loss_waste food_loss food_waste_all food_waste_liquid
## Min. 0.0000 0.000000 0.000000 0.000000
## 1st Qu. 8.4000 6.700000 1.100000 0.650000
## Median 9.6500 7.350000 2.100000 1.500000
## Mean 10.0177 7.831056 2.186646 1.478882
## 3rd Qu. 11.1500 8.400000 2.950000 2.050000
## Max. 17.9000 13.800000 6.550000 4.500000
## food_waste_solid
## Min. 0.000000
## 1st Qu. 0.350000
## Median 0.650000
## Mean 0.707764
## 3rd Qu. 0.950000
## Max. 2.950000
df %>%
filter(is_closed == FALSE) %>%
select(c(food_loss_kg,food_waste_kg,liquid_waste_kg,solid_waste_kg)) %>%
get_summary_stats()
## # A tibble: 4 × 13
## variable n min max median q1 q3 iqr mad mean sd se
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 food_loss_… 161 0 13.8 7.35 6.7 8.4 1.7 1.11 7.83 2.17 0.171
## 2 food_waste… 161 0 6.55 2.1 1.1 2.95 1.85 1.33 2.19 1.40 0.111
## 3 liquid_was… 161 0 4.5 1.5 0.65 2.05 1.4 1.04 1.48 0.995 0.078
## 4 solid_wast… 161 0 2.95 0.65 0.35 0.95 0.6 0.445 0.708 0.499 0.039
## # ℹ 1 more variable: ci <dbl>
# summary of--------------------
# 1. number of observations
# 2. Averages
# 3. standard deviations
# 4. Min values
# 4. Max values
stargazer(subset(df[4:7], df$is_closed == FALSE), flip=TRUE,
type = "text",digits=2, out="deps1.txt")
##
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N 161 161 161 161
## Mean 7.83 2.19 0.71 1.48
## St. Dev. 2.17 1.40 0.50 1.00
## Min 0.00 0.00 0.00 0.00
## Max 13.80 6.55 2.95 4.50
## -------------------------------------------------------------------
# Excluding the restaurant closed ---------------
stargazer(subset(df[4:7], df$is_closed == FALSE), flip=TRUE,
type = "text",digits=2, out="deps2.txt")
##
## ===================================================================
## Statistic food_loss_kg food_waste_kg solid_waste_kg liquid_waste_kg
## -------------------------------------------------------------------
## N 161 161 161 161
## Mean 7.83 2.19 0.71 1.48
## St. Dev. 2.17 1.40 0.50 1.00
## Min 0.00 0.00 0.00 0.00
## Max 13.80 6.55 2.95 4.50
## -------------------------------------------------------------------
# Create a data frame of numeric features & label
dep_features <- df %>%
select(c(is_closed, food_loss_kg, food_waste_kg,
solid_waste_kg, liquid_waste_kg))
# Pivot data to a long format
dep_features <- dep_features %>%
pivot_longer(!is_closed, names_to = "features",
values_to = "values") %>%
group_by(features) %>%
mutate(Mean = mean(values),
Median = median(values))
# Plot a histogram for each feature
dep_features %>%
ggplot() +
geom_histogram(aes(x = values, fill = features),
bins = 100, alpha = 0.7, show.legend = F) +
facet_wrap(~ features, scales = 'free')+
paletteer::scale_fill_paletteer_d("ggthemes::excel_Parallax") +
# Add lines for mean and median
geom_vline(aes(xintercept = Mean, color = "Mean"),
linetype = "dashed", linewidth = 1 ) +
geom_vline(aes(xintercept = Median, color = "Median"),
linetype = "dashed", linewidth = 1 ) +
scale_color_manual(name = "",
values = c(Mean = "red", Median = "yellow"))
# binwidth = bw
# bw <- 2 * IQR(df$food_loss_kg) / length(df$food_loss_kg)^(1/3)
# Histogram on food loss + food waste ------------------------------------------
hist_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x = food_loss_kg + food_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Loss and Food Waste - Histogram")
hist_loss_waste
# Histogram on food loss----------------------------------------------------
hist_loss <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_loss_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30, colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Loss - Histogram")
hist_loss
# Histogram of food waste ----------------------------------------------------
hist_food_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = food_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Waste - Histogram")
hist_food_waste
# Histogram of solid waste ----------------------------------------------------
hist_solid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = solid_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Solid Food Waste - Histogram")
hist_solid_waste
# Histogram of liquid waste ----------------------------------------------------
hist_liquid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = liquid_waste_kg)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Liquid Food Waste - Histogram")
hist_liquid_waste
grid.arrange(hist_loss_waste,hist_loss,
hist_food_waste,hist_solid_waste,hist_liquid_waste)
### Q-Q plot
# Food loss ------------
ggplot(subset(df, is_closed %in% FALSE),
aes(sample=food_loss_kg)) +
stat_qq() + stat_qq_line() +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Food Loss in kg")
# Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
aes(sample=food_waste_kg)) +
stat_qq() + stat_qq_line() +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Food Waste in kg")
# Solid Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
aes(sample=solid_waste_kg)) +
stat_qq() + stat_qq_line() +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Solid Food Waste in kg")
# Liquid Food waste ------------
ggplot(subset(df, is_closed %in% FALSE),
aes(sample=liquid_waste_kg)) +
stat_qq() + stat_qq_line() +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Liquid Food Waste in kg")
# Food waste ------------
df %>%
filter(is_closed == FALSE) %>%
shapiro_test(food_waste_kg, solid_waste_kg, liquid_waste_kg)
## # A tibble: 3 × 3
## variable statistic p
## <chr> <dbl> <dbl>
## 1 food_waste_kg 0.952 0.0000260
## 2 liquid_waste_kg 0.951 0.0000192
## 3 solid_waste_kg 0.903 0.00000000783
From the output, all the p-value is far less than 0.05; so implying that the distribution of the data are significantly different from normal distribution. In other words, we can not assume the normality.
# Histogram of food waste -------------------------------------------
hist_food_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x = food_waste_kg/customers)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Food Waste - Histogram")
hist_food_waste
# Histogram of solid waste --------------------------------------------
hist_solid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x = solid_waste_kg/customers)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Solid Food Waste - Histogram")
hist_solid_waste
# Histogram of liquid waste ----------------------------------------
hist_liquid_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x = liquid_waste_kg/customers)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,colour = 1, fill = "white") +
geom_density(linewidth = 1.5, colour = 4, fill = 4, alpha = 0.15) +
labs(title = "Liquid Food Waste - Histogram")
hist_liquid_waste
grid.arrange(hist_loss_waste,hist_loss,
hist_food_waste,hist_solid_waste,hist_liquid_waste)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:forecast':
##
## gghistogram
# Food waste ------------
ggqqplot(subset(df$food_waste_kg/df$customers,
df$is_closed %in% FALSE)) +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Food Waste per Capita in kg")
# Solid Food waste ------------
ggqqplot(subset(df$solid_waste_kg/df$customers,
df$is_closed %in% FALSE)) +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Solid Food Waste per Capita in kg")
# Liquid Food waste ------------
ggqqplot(subset(df$liquid_waste_kg/df$customers,
df$is_closed %in% FALSE)) +
xlab("theoretical") + ylab("sample") +
ggtitle("QQ plot of Liquid Food Waste per Capita in kg")
# Food waste ------------
df %>%
filter(is_closed == FALSE) %>%
mutate(food_waste_p_kg = food_waste_kg/customers,
solid_waste_p_kg = solid_waste_kg/customers,
liquid_waste_p_kg = liquid_waste_kg/customers) %>%
shapiro_test(food_waste_p_kg, solid_waste_p_kg, liquid_waste_p_kg)
## # A tibble: 3 × 3
## variable statistic p
## <chr> <dbl> <dbl>
## 1 food_waste_p_kg 0.987 1.38e- 1
## 2 liquid_waste_p_kg 0.984 6.10e- 2
## 3 solid_waste_p_kg 0.863 6.24e-11
From the output, the p-value of solid food waste per customer is far less that the significant level of 0.05; but the others are not. So it imply that the distribution of the data for solid food waste per customer is significantly different from normal distribution. In other words, we can assume the normality for food waste and liquid food waste per customer but not for solid food waste.
# Daily Plot on food loss + food waste ---------------------------------
daily_loss_waste <-
ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg + food_waste_kg)) +
geom_line(aes(group = 1), color="dark blue") +
geom_point(aes(shape = is_closed)) +
scale_shape_manual(values=c(16, 4)) +
scale_x_date(date_labels = "%b %d") +
theme(legend.position = c(0.05,0.15)) +
xlab("Date") + ylab("Daily Food Loss and Waste (kg)") +
ggtitle("Daily Food Loss and Waste Trend")
daily_loss_waste
# Daily Plot on food loss ------------------------------------------------
daily_loss <-
ggplot(data = df, aes(x = as.Date(date), y = food_loss_kg)) +
geom_line(color="blue") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.9,0.85)) +
xlab("Date") + ylab("Daily Food Loss (kg)") +
ggtitle("Daily Food Loss Trend")
daily_loss
# Daily Plot on food waste -----------------------------------------------
daily_waste <-
ggplot(data = df, aes(x = as.Date(date), y = food_waste_kg)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Food Waste (kg)") +
ggtitle("Daily Food Waste Trend")
daily_waste
# Daily Plot on solid food waste -----------------------------------------
daily_solid_waste <-
ggplot(data = df, aes(x = as.Date(date), y = solid_waste_kg)) +
geom_line(color="dark orange") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Solid Food Waste (kg)") +
ggtitle("Daily Solid Food Waste Trend")
daily_solid_waste
# Daily Plot on liquid food waste ----------------------------------------
daily_liquid_waste <-
ggplot(data = df, aes(x = as.Date(date), y = liquid_waste_kg)) +
geom_line(color="dark blue") +
geom_line(color="blue", linetype = "dashed") +
geom_point(aes(shape = is_closed)) +
scale_x_date(date_labels = "%b %d") +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = c(0.8,0.85)) +
xlab("Date") + ylab("Daily Liquid ood Waste (kg)") +
ggtitle("Daily Liquid Food Waste Trend")
daily_liquid_waste
grid.arrange(daily_loss_waste,daily_loss, daily_waste,
daily_solid_waste,daily_liquid_waste)
# Monthly Plot on food loss and food waste ---------------------------------
monthly_loss_waste <-
ggplot(data = df, aes(x = day_name,
y = food_loss_kg + food_waste_kg, group=1)) +
geom_line(color="dark blue") +
geom_point(aes(shape = is_closed)) +
scale_shape_manual(values=c(16, 4)) +
theme(legend.position = "none") +
# geom_rect(data = df, aes(xmin = date, xmax = dplyr::lead(date),
# ymin = -Inf, ymax = Inf,
# fill = factor(!is_closed)), alpha = .3) +
facet_grid(month_name~.) +
xlab("Date") + ylab("Monthly Food Loss and Waste (kg)") +
ggtitle("Monthly Food Loss and Waste Trend")
monthly_loss_waste
# Monthly Plot on food loss ------------------------------------------------
monthly_loss <-
ggplot(data = df, aes(x = day_name, y = food_loss_kg, group=1)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Food Loss (kg)") +
ggtitle("Monthly Food Loss Trend")
monthly_loss
# Monthly Plot on food waste -----------------------------------------------
monthly_waste <-
ggplot(data = df, aes(x = day_name, y = food_waste_kg, group=1)) +
geom_line(color="black") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Food Waste (kg)") +
ggtitle("Monthly Food Waste Trend")
monthly_waste
# Monthly Plot on solid food waste -----------------------------------------
monthly_solid_waste <-
ggplot(data = df, aes(x = day_name, y = solid_waste_kg, group=1)) +
geom_line(color="dark orange") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Solid Food Waste (kg)") +
ggtitle("Monthly Solid Food Waste Trend")
monthly_solid_waste
# Monthly Plot on liquid food waste ----------------------------------------
monthly_liquid_waste <-
ggplot(data = df, aes(x = day_name, y = liquid_waste_kg, group=1)) +
geom_line(color="blue") +
geom_point(aes(shape = is_closed)) +
facet_grid(month_name~.) +
scale_shape_manual(values=c(16, 4))+
theme(legend.position = "none") +
xlab("Date") + ylab("Monthly Liquid ood Waste (kg)") +
ggtitle("Monthly Liquid Food Waste Trend")
monthly_liquid_waste
# grid.arrange(monthly_loss_waste,monthly_loss, monthly_waste,
# monthly_solid_waste,monthly_liquid_waste)
# weekly boxplot on food loss + food waste ----------------------------
boxplot_week_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_loss_kg + food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Food Loss and Food Waste in Day of the Week",
x = "Week of Day", y = "Food Loss and Food Waste in kg")
boxplot_week_loss_waste
# weekly boxplot on food loss ----------------------------------
boxplot_week_food_loss <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_loss_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Food Loss in Day of the Week",
x = "Week of Day", y = "Food Loss in kg")
boxplot_week_food_loss
# weekly boxplot on food waste ------------------------------------
boxplot_week_food_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of All Food Waste in Day of the Week",
x = "Week of Day", y = "Food Waste in kg")
boxplot_week_food_waste
# weekly boxplot on solid food waste ------------------------------------
boxplot_week_solidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=solid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Solid Food Waste in Day of the Week",
x = "Week of Day", y = "Solid Food Waste in kg")
boxplot_week_solidWaste
# weekly boxplot on liquid food waste ------------------------------------
boxplot_week_liquidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=day, y=liquid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=4) +
stat_summary(fun=mean, geom="point", shape=16, size=3) +
labs(title = "Boxplot of Liquid Food Waste in Day of the Week",
x = "Week of Day", y = "Liquid Food Waste in kg")
boxplot_week_liquidWaste
grid.arrange(boxplot_week_food_loss,boxplot_week_food_waste,
boxplot_week_solidWaste,boxplot_week_liquidWaste)
# monthly boxplot on food loss + food waste ------------------------------------
boxplot_month_loss_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_loss_kg+food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Food Loss and Food Waste in Month",
x = "Month", y = "Food Loss and Waste in kg")
boxplot_month_loss_waste
# monthly boxplot on food loss ------------------------------------
boxplot_month_loss <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_loss_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Food Loss in Month",
x = "Month", y = "Food Waste in kg")
boxplot_month_loss
# monthly boxplot on food waste ------------------------------------
boxplot_month_waste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=food_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Daily Food Waste in Month",
x = "Month", y = "Food Waste in kg")
boxplot_month_waste
# monthly boxplot on solid food waste ------------------------------------
boxplot_month_solidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=solid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Daily Solid Food Waste in Month",
x = "Monthy", y = "Solid Food Waste in kg")
boxplot_month_solidWaste
# boxplot of week of day for solid food waste ------------------------------------
boxplot_month_liquidWaste <-
ggplot(data = subset(df, is_closed %in% FALSE),
aes(x=month_name, y=liquid_waste_kg)) +
geom_boxplot(outlier.shape=8, outlier.size=2) +
stat_summary(fun=mean, geom="point", shape=16, size=2) +
labs(title = "Boxplot of Liquid Food Waste in Month",
x = "Month", y = "Liquid Food Waste in kg")
boxplot_month_liquidWaste
grid.arrange(boxplot_month_loss_waste, boxplot_month_loss,boxplot_month_waste,
boxplot_month_solidWaste,boxplot_month_liquidWaste)
## Time Series plots of:
# 1. weather conditions: temperature, humidity, precipitation
# 2. # orders + dine in + size + liquor + daily sales (confident)
# Time Series Plot on temperature ---------------------------------
tsPlot_temp <-
ggplot(data = df, aes(x = as.Date(date), y = temp_c)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(aes(group = 1), color="orange") +
geom_hline(aes(yintercept = 22), linetype='dotted') +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Temperature in Degree Celsius") +
ggtitle("Daily Average Hourly Temperature Plot")
tsPlot_temp
## `geom_smooth()` using formula = 'y ~ x'
# Time Series Plot on gap temperature with 22C---------------------------------
tsPlot_temp_gap <-
ggplot(data = df, aes(x = as.Date(date), y = temp_c-22)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="green") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Gap Temperature in Degree Celsius") +
ggtitle("Daily Gap Temperature Plot")
tsPlot_temp_gap
## `geom_smooth()` using formula = 'y ~ x'
# Time Series Plot on humidity ---------------------------------
tsPlot_humidity <-
ggplot(data = df, aes(x = as.Date(date), y = humi_p)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="red") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Humidity in Percent") +
ggtitle("Daily Humidity Plot")
tsPlot_humidity
## `geom_smooth()` using formula = 'y ~ x'
# Time Series Plot on precipitation ---------------------------------
tsPlot_precip <-
ggplot(data = df, aes(x = as.Date(date), y = prcp_mm)) +
geom_point() +
stat_smooth(method = "loess", color = "green", fill = "green") +
# geom_line(color="blue") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Precipitation in millimetre") +
ggtitle("Daily Precipitation Plot")
tsPlot_precip
## `geom_smooth()` using formula = 'y ~ x'
grid.arrange(tsPlot_temp,tsPlot_temp_gap,tsPlot_humidity, tsPlot_precip)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Time Series plots of:
# 1. # orders (full, half, takeouts)
# 2. daily dine in served (kg)
# 4. liquor
# 5. daily sales (confident)
# Time Series Plot on Meal Orders ---------------------------------
tsPlot_total_orders <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x=as.Date(date))) +
geom_line(aes(y = fulls, color="fulls")) +
geom_line(aes(y = halfs, color="halfs")) +
scale_x_date(date_labels = "%b %d") +
geom_line(aes(y = takeouts, color="takeouts"), linetype = "dashed") +
xlab("Date") + ylab("Daily Number of Meal Orders") +
ggtitle("Daily Different Package Meal Orders Plot")+
scale_color_manual(name='Packages',
breaks=c('fulls', 'halfs', 'takeouts'),
values=c('fulls' = 'dark blue',
'halfs' = 'purple',
'takeouts'='dark red')) +
theme(legend.position = "right")
tsPlot_total_orders
# Time Series Plot on demand and production ---------------------------------
tsPlot_D_S <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
geom_line(aes(y = daily_total_served), color="dark blue") +
geom_line(aes(y = FL_FP_kg), color="dark red", linetype = "dashed") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Daily Quantity in kg") +
ggtitle("Daily Total Served and Production Plot") +
scale_color_manual(name='Served',
breaks=c('daily_total_served', 'FL_FP_kg'),
values=c('daily_total_served' = 'dark blue',
'FL_FP_kg' = 'dark red')) +
theme(legend.position = "right")
tsPlot_D_S
tsPlot_diff_D_S <-
ggplot(data = subset(df, is_closed %in% FALSE), aes(x = as.Date(date))) +
geom_line(aes(y = daily_total_served - FL_FP_kg), color="black") +
stat_smooth(aes(y = daily_total_served - FL_FP_kg), method = "loess",
color = "light green", fill = "light green") +
scale_x_date(date_labels = "%b %d") +
xlab("Date") + ylab("Daily Inventory in kg") +
ggtitle("Difference Between Total Served and Production Plot")
tsPlot_diff_D_S
## `geom_smooth()` using formula = 'y ~ x'
# Time Series Plot on precipitation ---------------------------------
tsPlot_sales <-
ggplot(data = subset(df, is_closed %in% FALSE),aes(x = as.Date(date))) +
geom_line(aes(y = sales), color="purple") +
scale_x_date(date_labels = "%b %d") +
stat_smooth(aes(y = sales), method = "loess",
color = "light green", fill = "light green") +
xlab("Date") + ylab("Daily Sales in dollar") +
ggtitle("Daily Sales Plot")
tsPlot_sales
## `geom_smooth()` using formula = 'y ~ x'
grid.arrange(tsPlot_total_orders,tsPlot_D_S, tsPlot_diff_D_S,tsPlot_sales)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## acf and pacf ----------------------------------------------------------------
# acf and pacf for food loss ---------------------------------------------------
acf_fl <- ggAcf(as.ts(df$food_loss_kg))
pacf_fl <- ggPacf(as.ts(df$food_loss_kg))
# acf and pacf for all food waste ----------------------------------------------
acf_fw <- ggAcf(as.ts(df$food_waste_kg))
pacf_fw <- ggPacf(as.ts(df$food_waste_kg))
# acf and pacf for solid food waste --------------------------------------------
acf_sfw <- ggAcf(as.ts(df$solid_waste_kg))
pacf_sfw <- ggPacf(as.ts(df$solid_waste_kg))
# acf and pacf for liquid food waste -------------------------------------------
acf_lfw <- ggAcf(as.ts(df$liquid_waste_kg))
pacf_lfw <- ggPacf(as.ts(df$liquid_waste_kg))
grid.arrange(acf_fl,pacf_fl)
grid.arrange(acf_fw,pacf_fw)
grid.arrange(acf_sfw,pacf_sfw)
grid.arrange(acf_lfw,pacf_lfw)
# spectrum analysis for food loss ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_fl <- list(spec.pgram(df$food_loss_kg, spans = 2))
1/raw.spec_fl[[1]]$freq[which.max(raw.spec_fl[[1]]$spec)]
## [1] 3
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_fw<- list(spec.pgram(df$food_waste_kg, spans = 2))
1/raw.spec_fw[[1]]$freq[which.max(raw.spec_fw[[1]]$spec)]
## [1] 6
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_sfw<- list(spec.pgram(df$solid_waste_kg, spans = 2))
1/raw.spec_sfw[[1]]$freq[which.max(raw.spec_sfw[[1]]$spec)]
## [1] 8.571429
# spectrum analysis for food waste ---------------------------------------------
# plot.spectrum(dt$allWasteKg)
raw.spec_lfw<- list(spec.pgram(df$liquid_waste_kg, spans = 2))
1/raw.spec_lfw[[1]]$freq[which.max(raw.spec_lfw[[1]]$spec)]
## [1] 6
roughly 6 (days) period for food waste, but food loss is approx. 3 days or 20 days cycle.
rm(list = ls()[! ls() %in% c("df", "AdjMat")])